from time import sleep

import requests
from lxml import etree
import pandas as pd

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.69'
}


def main():
    data_list = []
    for url in create_url():
        data_list += parse_html(request_url(url))
        sleep(2)
    save_data(data_list)


def create_url():
    urls = [f'https://sz.fang.anjuke.com/loupan/all/p{i}/' for i in range(20, 23)]
    return urls


def request_url(url):
    response = requests.get(url, headers=headers)
    html = response.text
    return html


def parse_html(html):
    selector = etree.HTML(html)
    div_list = selector.xpath('/html/body/div[2]/div[2]/div[1]/div[4]/div[@class="item-mod "]')
    data_list = []
    for div in div_list:
        title = '|'.join(div.xpath('./div[@class="infos"]/a[@class="lp-name"]/span/text()'))
        address = '|'.join(div.xpath('./div[@class="infos"]/a[@class="address"]/span/text()'))
        house_type = '|'.join(div.xpath('./div[@class="infos"]/a[@class="huxing"]/span/text()'))
        tags_wrap_1 = '|'.join(div.xpath('./div[@class="infos"]/a[@class="tags-wrap"]/div/i/text()'))
        tags_wrap_2 = '|'.join(div.xpath('./div[@class="infos"]/a[@class="tags-wrap"]/div/span/text()'))
        price = '|'.join(div.xpath('./a[@class="favor-pos"]/p/span/text()'))
        data_list.append([title, address, house_type, tags_wrap_1, tags_wrap_2, price])
    print(data_list)
    return data_list


def save_data(data_list):
    df = pd.DataFrame(data_list, columns=['title', 'address', 'house_type', 'tags_wrap_1', 'tags_wrap_2', 'price'])
    df.to_csv('../static/data/info_5.csv', index=False, encoding='utf_8_sig')


if __name__ == '__main__':
    main()
